1 /** 2 * Hash Map 3 * Copyright: © 2015 Economic Modeling Specialists, Intl. 4 * Authors: Brian Schott 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 8 module containers.hashmap; 9 10 private import core.lifetime : move; 11 private import containers.internal.hash; 12 private import containers.internal.node : shouldAddGCRange; 13 private import std.experimental.allocator.mallocator : Mallocator; 14 private import std.traits : isBasicType, Unqual; 15 16 /** 17 * Associative array / hash map. 18 * Params: 19 * K = the key type 20 * V = the value type 21 * Allocator = the allocator type to use. Defaults to `Mallocator` 22 * hashFunction = the hash function to use on the keys 23 * supportGC = true if the container should support holding references to 24 * GC-allocated memory. 25 */ 26 struct HashMap(K, V, Allocator = Mallocator, alias hashFunction = generateHash!K, 27 bool supportGC = shouldAddGCRange!K || shouldAddGCRange!V, 28 bool storeHash = true) 29 { 30 this(this) @disable; 31 32 private import std.experimental.allocator.common : stateSize; 33 34 static if (stateSize!Allocator != 0) 35 { 36 this() @disable; 37 38 /** 39 * Use the given `allocator` for allocations. 40 */ 41 this(Allocator allocator) pure nothrow @nogc @safe 42 in 43 { 44 assert(allocator !is null, "Allocator must not be null"); 45 } 46 do 47 { 48 this.allocator = allocator; 49 } 50 51 /** 52 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount 53 * must be a power of two. 54 */ 55 this(size_t bucketCount, Allocator allocator) 56 in 57 { 58 assert(allocator !is null, "Allocator must not be null"); 59 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 60 } 61 do 62 { 63 this.allocator = allocator; 64 initialize(bucketCount); 65 } 66 67 invariant 68 { 69 assert(allocator !is null); 70 } 71 } 72 else 73 { 74 /** 75 * Constructs an HashMap with an initial bucket count of bucketCount. bucketCount 76 * must be a power of two. 77 */ 78 this(size_t bucketCount) 79 in 80 { 81 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 82 } 83 do 84 { 85 initialize(bucketCount); 86 } 87 } 88 89 ~this() nothrow 90 { 91 scope (failure) assert(false); 92 clear(); 93 } 94 95 /** 96 * Removes all items from the map 97 */ 98 void clear() 99 { 100 import std.experimental.allocator : dispose; 101 102 // always remove ranges from GC first before disposing of buckets, to 103 // prevent segfaults when the GC collects at an unfortunate time 104 static if (useGC) 105 GC.removeRange(buckets.ptr); 106 allocator.dispose(buckets); 107 108 buckets = null; 109 _length = 0; 110 } 111 112 /** 113 * Supports `aa[key]` syntax. 114 */ 115 ref opIndex(this This)(K key) 116 { 117 import std.conv : text; 118 import std.exception : enforce; 119 120 alias CET = ContainerElementType!(This, V, true); 121 size_t i; 122 auto n = find(key, i); 123 enforce(n !is null, "'" ~ text(key) ~ "' not found in HashMap"); 124 return *cast(CET*) &n.value; 125 } 126 127 /** 128 * Returns: `true` if there is an entry in this map for the given `key`, 129 * false otherwise. 130 */ 131 bool containsKey(this This)(K key) inout 132 { 133 size_t i; 134 return find(key, i) !is null; 135 } 136 137 /** 138 * Gets the value for the given key, or returns `defaultValue` if the given 139 * key is not present. 140 * 141 * Params: 142 * key = the key to look up 143 * value = the default value 144 * Returns: the value indexed by `key`, if present, or `defaultValue` otherwise. 145 */ 146 auto get(this This)(K key, lazy V defaultValue) 147 { 148 alias CET = ContainerElementType!(This, V); 149 150 size_t i; 151 auto n = find(key, i); 152 if (n is null) 153 return defaultValue; 154 return cast(CET) n.value; 155 } 156 157 /** 158 * If the given key does not exist in the HashMap, adds it with 159 * the value `defaultValue`. 160 * 161 * Params: 162 * key = the key to look up 163 * value = the default value 164 * Returns: a pointer to the value stored in the HashMap with the given key. 165 * The pointer is guaranteed to be valid only until the next HashMap 166 * modification. 167 */ 168 auto getOrAdd(this This)(K key, lazy V defaultValue) 169 { 170 alias CET = ContainerElementType!(This, V); 171 172 size_t i; 173 auto n = find(key, i); 174 if (n is null) 175 return cast(CET*) &insert(key, defaultValue).value; 176 else 177 return cast(CET*) &n.value; 178 } 179 180 /** 181 * Supports $(B aa[key] = value;) syntax. 182 */ 183 void opIndexAssign(V value, const K key) 184 { 185 insert(key, move(mutable(value))); 186 } 187 188 /** 189 * Supports $(B key in aa) syntax. 190 * 191 * Returns: pointer to the value corresponding to the given key, 192 * or null if the key is not present in the HashMap. 193 */ 194 inout(V)* opBinaryRight(string op)(const K key) inout nothrow @trusted if (op == "in") 195 { 196 size_t i; 197 auto n = find(key, i); 198 if (n is null) 199 return null; 200 return &(cast(inout) n).value; 201 } 202 203 /** 204 * Removes the value associated with the given key 205 * Returns: true if a value was actually removed. 206 */ 207 bool remove(K key) 208 { 209 size_t i; 210 auto n = find(key, i); 211 if (n is null) 212 return false; 213 static if (storeHash) 214 auto node = Node(n.hash, n.key); 215 else 216 auto node = Node(n.key); 217 immutable bool removed = buckets[i].remove(node); 218 if (removed) 219 _length--; 220 return removed; 221 } 222 223 /** 224 * Returns: the number of key/value pairs in this container. 225 */ 226 size_t length() const nothrow pure @property @safe @nogc 227 { 228 return _length; 229 } 230 231 /** 232 * Returns: `true` if there are no items in this container. 233 */ 234 bool empty() const nothrow pure @property @safe @nogc 235 { 236 return _length == 0; 237 } 238 239 /** 240 * Returns: a range of the keys in this map. 241 */ 242 auto byKey(this This)() inout @trusted 243 { 244 return MapRange!(This, IterType.key)(cast(Unqual!(This)*) &this); 245 } 246 247 /** 248 * Returns: a GC-allocated array filled with the keys contained in this map. 249 */ 250 K[] keys() const @property 251 out(result) 252 { 253 assert (result.length == _length); 254 } 255 do 256 { 257 import std.array : appender; 258 auto app = appender!(K[])(); 259 foreach (ref const bucket; buckets) 260 { 261 foreach (ref item; bucket) 262 app.put(cast(K) item.key); 263 } 264 return app.data; 265 } 266 267 268 /** 269 * Returns: a range of the values in this map. 270 */ 271 auto byValue(this This)() inout @trusted 272 { 273 return MapRange!(This, IterType.value)(cast(Unqual!(This)*) &this); 274 } 275 276 /// ditto 277 alias opSlice = byValue; 278 279 /** 280 * Returns: a GC-allocated array containing the values contained in this map. 281 */ 282 auto values(this This)() const @property 283 out(result) 284 { 285 assert (result.length == _length); 286 } 287 do 288 { 289 import std.array : appender; 290 auto app = appender!(ContainerElementType!(This, V)[])(); 291 foreach (ref const bucket; buckets) 292 { 293 foreach (item; bucket) 294 app.put(cast(ContainerElementType!(This, V)) item.value); 295 } 296 return app.data; 297 } 298 299 /** 300 * Returns: a range of the kev/value pairs in this map. The element type of 301 * this range is a struct with `key` and `value` fields. 302 */ 303 auto byKeyValue(this This)() inout @trusted 304 { 305 return MapRange!(This, IterType.both)(cast(Unqual!(This)*) &this); 306 } 307 308 /** 309 * Support for $(D foreach(key, value; aa) { ... }) syntax; 310 */ 311 int opApply(int delegate(const ref K, ref V) del) 312 { 313 int result = 0; 314 foreach (ref bucket; buckets) 315 foreach (ref node; bucket[]) 316 if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0) 317 return result; 318 return result; 319 } 320 321 /// ditto 322 int opApply(int delegate(const ref K, const ref V) del) const 323 { 324 int result = 0; 325 foreach (const ref bucket; buckets) 326 foreach (const ref node; bucket[]) 327 if ((result = del(*cast(K*)&node.key, *cast(V*)&node.value)) != 0) 328 return result; 329 return result; 330 } 331 332 /// ditto 333 int opApply(int delegate(ref V) del) 334 { 335 int result = 0; 336 foreach (ref bucket; buckets) 337 foreach (ref node; bucket[]) 338 if ((result = del(*cast(V*)&node.value)) != 0) 339 return result; 340 return result; 341 } 342 343 /// ditto 344 int opApply(int delegate(const ref V) del) const 345 { 346 int result = 0; 347 foreach (const ref bucket; buckets) 348 foreach (const ref node; bucket[]) 349 if ((result = del(*cast(V*)&node.value)) != 0) 350 return result; 351 return result; 352 } 353 354 mixin AllocatorState!Allocator; 355 356 private: 357 358 import std.experimental.allocator : make, makeArray; 359 import containers.unrolledlist : UnrolledList; 360 import containers.internal.storage_type : ContainerStorageType; 361 import containers.internal.element_type : ContainerElementType; 362 import containers.internal.mixins : AllocatorState; 363 import core.memory : GC; 364 365 enum bool useGC = supportGC && (shouldAddGCRange!K || shouldAddGCRange!V); 366 alias Hash = typeof({ K k = void; return hashFunction(k); }()); 367 368 static ref ContainerStorageType!T mutable(T)(ref T value) { return *cast(ContainerStorageType!T*)&value; } 369 370 enum IterType: ubyte 371 { 372 key, value, both 373 } 374 375 static struct MapRange(MapType, IterType Type) 376 { 377 static if (Type == IterType.both) 378 { 379 struct FrontType 380 { 381 ContainerElementType!(MapType, K) key; 382 ContainerElementType!(MapType, V) value; 383 } 384 } 385 else static if (Type == IterType.value) 386 alias FrontType = ContainerElementType!(MapType, V); 387 else static if (Type == IterType.key) 388 alias FrontType = ContainerElementType!(MapType, K); 389 else 390 static assert(false); 391 392 FrontType front() 393 { 394 static if (Type == IterType.both) 395 return FrontType(cast(ContainerElementType!(MapType, K)) bucketRange.front.key, 396 cast(ContainerElementType!(MapType, V)) bucketRange.front.value); 397 else static if (Type == IterType.value) 398 return cast(ContainerElementType!(MapType, V)) bucketRange.front.value; 399 else static if (Type == IterType.key) 400 return cast(ContainerElementType!(MapType, K)) bucketRange.front.key; 401 else 402 static assert(false); 403 } 404 405 bool empty() const pure nothrow @nogc @property 406 { 407 return _empty; 408 } 409 410 void popFront() pure nothrow @nogc 411 { 412 bucketRange.popFront(); 413 if (bucketRange.empty) 414 { 415 while (bucketRange.empty) 416 { 417 bucketIndex++; 418 if (bucketIndex >= hm.buckets.length) 419 { 420 _empty = true; 421 break; 422 } 423 else 424 bucketRange = hm.buckets[bucketIndex][]; 425 } 426 } 427 } 428 429 private: 430 431 this(Unqual!(MapType)* hm) 432 { 433 this.hm = hm; 434 this.bucketIndex = 0; 435 bucketRange = typeof(bucketRange).init; 436 this._empty = false; 437 438 while (true) 439 { 440 if (bucketIndex >= hm.buckets.length) 441 { 442 _empty = true; 443 break; 444 } 445 bucketRange = hm.buckets[bucketIndex][]; 446 if (bucketRange.empty) 447 bucketIndex++; 448 else 449 break; 450 } 451 } 452 453 Unqual!(MapType)* hm; 454 size_t bucketIndex; 455 typeof(hm.buckets[0].opSlice()) bucketRange; 456 bool _empty; 457 } 458 459 void initialize(size_t bucketCount = DEFAULT_BUCKET_COUNT) 460 { 461 import std.conv : emplace; 462 assert((bucketCount & (bucketCount - 1)) == 0, "bucketCount must be a power of two"); 463 464 buckets = makeArray!Bucket(allocator, bucketCount); 465 static if (useGC) 466 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 467 foreach (ref bucket; buckets) 468 { 469 static if (stateSize!Allocator == 0) 470 emplace(&bucket); 471 else 472 emplace(&bucket, allocator); 473 } 474 } 475 476 Node* insert(const K key, V value) 477 { 478 return insert(key, move(mutable(value)), hashFunction(key)); 479 } 480 481 Node* insert(const K key, V value, const Hash hash, const bool modifyLength = true) 482 { 483 if (buckets.length == 0) 484 initialize(); 485 immutable size_t index = hashToIndex(hash, buckets.length); 486 foreach (ref item; buckets[index]) 487 { 488 if (item.hash == hash && item.key == key) 489 { 490 item.value = move(mutable(value)); 491 return &item; 492 } 493 } 494 static if (storeHash) 495 Node node = Node(hash, cast(ContainerStorageType!K) key, move(mutable(value))); 496 else 497 Node node = Node(cast(ContainerStorageType!K) key, move(mutable(value))); 498 Node* n = buckets[index].insertAnywhere(move(node)); 499 if (modifyLength) 500 _length++; 501 if (shouldRehash()) 502 { 503 rehash(); 504 immutable newIndex = hashToIndex(hash, buckets.length); 505 foreach (ref item; buckets[newIndex]) 506 { 507 if (item.hash == hash && item.key == key) 508 return &item; 509 } 510 assert(false); 511 } 512 else 513 return n; 514 } 515 516 /** 517 * Returns: true if the load factor has been exceeded 518 */ 519 bool shouldRehash() const pure nothrow @safe @nogc 520 { 521 // We let this be greater than one because each bucket is an unrolled 522 // list that has more than one element per linked list node. 523 return (float(_length) / float(buckets.length)) > 1.33f; 524 } 525 526 /** 527 * Rehash the map. 528 */ 529 void rehash() @trusted 530 { 531 import std.conv : emplace; 532 immutable size_t newLength = buckets.length << 1; 533 immutable size_t newSize = newLength * Bucket.sizeof; 534 Bucket[] oldBuckets = buckets; 535 assert (oldBuckets.ptr == buckets.ptr); 536 buckets = cast(Bucket[]) allocator.allocate(newSize); 537 static if (useGC) 538 GC.addRange(buckets.ptr, buckets.length * Bucket.sizeof); 539 if (newLength) 540 assert (buckets); 541 assert (buckets.length == newLength); 542 foreach (ref bucket; buckets) 543 { 544 static if (stateSize!Allocator == 0) 545 emplace(&bucket); 546 else 547 emplace(&bucket, allocator); 548 } 549 550 foreach (ref bucket; oldBuckets) 551 { 552 foreach (ref node; bucket) 553 insert(cast(K) node.key, move(node.value), node.hash, false); 554 typeid(typeof(bucket)).destroy(&bucket); 555 } 556 static if (useGC) 557 GC.removeRange(oldBuckets.ptr); 558 allocator.deallocate(cast(void[]) oldBuckets); 559 } 560 561 inout(Node)* find(const K key, ref size_t index) inout 562 { 563 return find(key, index, hashFunction(key)); 564 } 565 566 inout(Node)* find(const K key, ref size_t index, const Hash hash) inout 567 { 568 import std.array : empty; 569 570 if (buckets.empty) 571 return null; 572 index = hashToIndex(hash, buckets.length); 573 foreach (ref r; buckets[index]) 574 { 575 if (r.hash == hash && r.key == key) 576 return cast(inout(Node)*) &r; 577 } 578 return null; 579 } 580 581 struct Node 582 { 583 bool opEquals(ref const K key) const 584 { 585 return key == this.key; 586 } 587 588 bool opEquals(ref const Node n) const 589 { 590 return this.hash == n.hash && this.key == n.key; 591 } 592 593 static if (storeHash) 594 Hash hash; 595 else 596 @property Hash hash() const { return hashFunction(key); } 597 598 ContainerStorageType!K key; 599 ContainerStorageType!V value; 600 } 601 602 alias Bucket = UnrolledList!(Node, Allocator, useGC); 603 Bucket[] buckets; 604 size_t _length; 605 } 606 607 /// 608 unittest 609 { 610 import std.uuid : randomUUID; 611 import std.range.primitives : walkLength; 612 613 auto hm = HashMap!(string, int)(16); 614 assert (hm.length == 0); 615 assert (!hm.remove("abc")); 616 hm["answer"] = 42; 617 assert (hm.length == 1); 618 assert ("answer" in hm); 619 assert (hm.containsKey("answer")); 620 hm.remove("answer"); 621 assert (hm.length == 0); 622 assert ("answer" !in hm); 623 assert (hm.get("answer", 1000) == 1000); 624 assert (!hm.containsKey("answer")); 625 hm["one"] = 1; 626 hm["one"] = 1; 627 assert (hm.length == 1); 628 assert (hm["one"] == 1); 629 hm["one"] = 2; 630 assert(hm["one"] == 2); 631 foreach (i; 0 .. 1000) 632 { 633 hm[randomUUID().toString] = i; 634 } 635 assert (hm.length == 1001); 636 assert (hm.keys().length == hm.length); 637 assert (hm.values().length == hm.length); 638 () @nogc { 639 assert (hm.byKey().walkLength == hm.length); 640 assert (hm.byValue().walkLength == hm.length); 641 assert (hm[].walkLength == hm.length); 642 assert (hm.byKeyValue().walkLength == hm.length); 643 }(); 644 foreach (v; hm) {} 645 646 auto hm2 = HashMap!(char, char)(4); 647 hm2['a'] = 'a'; 648 649 HashMap!(int, int) hm3; 650 assert (hm3.get(100, 20) == 20); 651 hm3[100] = 1; 652 assert (hm3.get(100, 20) == 1); 653 auto pValue = 100 in hm3; 654 assert(*pValue == 1); 655 } 656 657 version(emsi_containers_unittest) unittest 658 { 659 static class Foo 660 { 661 string name; 662 } 663 664 void someFunc(const scope ref HashMap!(string,Foo) map) @safe 665 { 666 foreach (kv; map.byKeyValue()) 667 { 668 assert (kv.key == "foo"); 669 assert (kv.value.name == "Foo"); 670 } 671 } 672 673 auto hm = HashMap!(string, Foo)(16); 674 auto f = new Foo; 675 f.name = "Foo"; 676 hm.insert("foo", f); 677 assert("foo" in hm); 678 } 679 680 // Issue #54 681 version(emsi_containers_unittest) unittest 682 { 683 HashMap!(string, int) map; 684 map.insert("foo", 0); 685 map.insert("bar", 0); 686 687 foreach (key; map.keys()) 688 map[key] = 1; 689 foreach (key; map.byKey()) 690 map[key] = 1; 691 692 foreach (value; map.byValue()) 693 assert(value == 1); 694 foreach (value; map.values()) 695 assert(value == 1); 696 } 697 698 version(emsi_containers_unittest) unittest 699 { 700 HashMap!(int, int, Mallocator, (int i) => i) map; 701 auto p = map.getOrAdd(1, 1); 702 assert(*p == 1); 703 *p = 2; 704 assert(map[1] == 2); 705 } 706 707 debug (EMSI_CONTAINERS) version(emsi_containers_unittest) unittest 708 { 709 import std.uuid : randomUUID; 710 import std.algorithm.iteration : walkLength; 711 import std.stdio; 712 713 auto hm = HashMap!(string, int)(16); 714 foreach (i; 0 .. 1_000_000) 715 { 716 auto str = randomUUID().toString; 717 //writeln("Inserting ", str); 718 hm[str] = i; 719 //if (i > 0 && i % 100 == 0) 720 //writeln(i); 721 } 722 writeln(hm.buckets.length); 723 724 import std.algorithm.sorting:sort; 725 ulong[ulong] counts; 726 foreach (i, ref bucket; hm.buckets[]) 727 counts[bucket.length]++; 728 foreach (k; counts.keys.sort()) 729 writeln(k, "=>", counts[k]); 730 } 731 732 // #74 733 version(emsi_containers_unittest) unittest 734 { 735 HashMap!(string, size_t) aa; 736 aa["b"] = 0; 737 ++aa["b"]; 738 assert(aa["b"] == 1); 739 } 740 741 // storeHash == false 742 version(emsi_containers_unittest) unittest 743 { 744 static struct S { size_t v; } 745 HashMap!(S, S, Mallocator, (S s) { return s.v; }, false, false) aa; 746 static assert(aa.Node.sizeof == 2 * S.sizeof); 747 } 748 749 version(emsi_containers_unittest) unittest 750 { 751 auto hm = HashMap!(string, int)(16); 752 753 foreach (v; hm) {} 754 foreach (ref v; hm) {} 755 foreach (int v; hm) {} 756 foreach (ref int v; hm) {} 757 foreach (const ref int v; hm) {} 758 759 foreach (k, v; hm) {} 760 foreach (k, ref v; hm) {} 761 foreach (k, int v; hm) {} 762 foreach (k, ref int v; hm) {} 763 foreach (k, const ref int v; hm) {} 764 765 foreach (ref k, v; hm) {} 766 foreach (ref k, ref v; hm) {} 767 foreach (ref k, int v; hm) {} 768 foreach (ref k, ref int v; hm) {} 769 foreach (ref k, const ref int v; hm) {} 770 771 foreach (const string k, v; hm) {} 772 foreach (const string k, ref v; hm) {} 773 foreach (const string k, int v; hm) {} 774 foreach (const string k, ref int v; hm) {} 775 foreach (const string k, const ref int v; hm) {} 776 777 foreach (const ref string k, v; hm) {} 778 foreach (const ref string k, ref v; hm) {} 779 foreach (const ref string k, int v; hm) {} 780 foreach (const ref string k, ref int v; hm) {} 781 foreach (const ref string k, const ref int v; hm) {} 782 783 hm["a"] = 1; 784 foreach (k, ref v; hm) { v++; } 785 assert(hm["a"] == 2); 786 } 787 788 version(emsi_containers_unittest) unittest 789 { 790 static struct S { @disable this(this); } 791 alias HM = HashMap!(int, S); 792 } 793 794 version(emsi_containers_unittest) unittest 795 { 796 struct S { int* a; } 797 alias HM = HashMap!(S, int); 798 }